3ddb79bdN51qpRC-6bOH-v5hl_AK6A xen/common/network.c
3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c
3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c
+4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c
3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c
3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xeno/perfc.h
3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xeno/perfc_defn.h
3ddb79c04nQVR3EYM5L4zxDV_MCo1g xen/include/xeno/prefetch.h
+4006e65fWMwLqcocgik6wbF0Eeh0Og xen/include/xeno/rbtree.h
3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xeno/reboot.h
3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xeno/sched.h
3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen/include/xeno/skbuff.h
l1_pgentry_t *l1tab = NULL, *l1start = NULL;
struct pfn_info *page = NULL;
+ extern void ide_probe_devices(xen_disk_info_t *);
+ extern void scsi_probe_devices(xen_disk_info_t *);
+ xen_disk_info_t xdi;
+ xen_disk_t *xd;
+
/* Sanity! */
if ( p->domain != 0 ) BUG();
if ( (p->flags & PF_CONSTRUCTED) ) BUG();
write_cr3_counted(pagetable_val(current->mm.pagetable));
__sti();
+ /* DOM0 gets access to all real block devices. */
+#define MAX_REAL_DISKS 256
+ xd = kmalloc(MAX_REAL_DISKS * sizeof(xen_disk_t), GFP_KERNEL);
+ xdi.max = MAX_REAL_DISKS;
+ xdi.count = 0;
+ xdi.disks = xd;
+ ide_probe_devices(&xdi);
+ scsi_probe_devices(&xdi);
+ for ( i = 0; i < xdi.count; i++ )
+ {
+ xen_extent_t e;
+ e.device = xd[i].device;
+ e.start_sector = 0;
+ e.nr_sectors = xd[i].capacity;
+ if ( (__vbd_create(p, xd[i].device, VBD_MODE_R|VBD_MODE_W,
+ xd[i].info) != 0) ||
+ (__vbd_grow(p, xd[i].device, &e) != 0) )
+ BUG();
+ }
+ kfree(xd);
+
p->flags |= PF_CONSTRUCTED;
new_thread(p,
--- /dev/null
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include <xeno/rbtree.h>
+
+static void __rb_rotate_left(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * right = node->rb_right;
+
+ if ((node->rb_right = right->rb_left))
+ right->rb_left->rb_parent = node;
+ right->rb_left = node;
+
+ if ((right->rb_parent = node->rb_parent))
+ {
+ if (node == node->rb_parent->rb_left)
+ node->rb_parent->rb_left = right;
+ else
+ node->rb_parent->rb_right = right;
+ }
+ else
+ root->rb_node = right;
+ node->rb_parent = right;
+}
+
+static void __rb_rotate_right(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * left = node->rb_left;
+
+ if ((node->rb_left = left->rb_right))
+ left->rb_right->rb_parent = node;
+ left->rb_right = node;
+
+ if ((left->rb_parent = node->rb_parent))
+ {
+ if (node == node->rb_parent->rb_right)
+ node->rb_parent->rb_right = left;
+ else
+ node->rb_parent->rb_left = left;
+ }
+ else
+ root->rb_node = left;
+ node->rb_parent = left;
+}
+
+void rb_insert_color(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * parent, * gparent;
+
+ while ((parent = node->rb_parent) && parent->rb_color == RB_RED)
+ {
+ gparent = parent->rb_parent;
+
+ if (parent == gparent->rb_left)
+ {
+ {
+ register rb_node_t * uncle = gparent->rb_right;
+ if (uncle && uncle->rb_color == RB_RED)
+ {
+ uncle->rb_color = RB_BLACK;
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_right == node)
+ {
+ register rb_node_t * tmp;
+ __rb_rotate_left(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ __rb_rotate_right(gparent, root);
+ } else {
+ {
+ register rb_node_t * uncle = gparent->rb_left;
+ if (uncle && uncle->rb_color == RB_RED)
+ {
+ uncle->rb_color = RB_BLACK;
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_left == node)
+ {
+ register rb_node_t * tmp;
+ __rb_rotate_right(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ parent->rb_color = RB_BLACK;
+ gparent->rb_color = RB_RED;
+ __rb_rotate_left(gparent, root);
+ }
+ }
+
+ root->rb_node->rb_color = RB_BLACK;
+}
+EXPORT_SYMBOL(rb_insert_color);
+
+static void __rb_erase_color(rb_node_t * node, rb_node_t * parent,
+ rb_root_t * root)
+{
+ rb_node_t * other;
+
+ while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node)
+ {
+ if (parent->rb_left == node)
+ {
+ other = parent->rb_right;
+ if (other->rb_color == RB_RED)
+ {
+ other->rb_color = RB_BLACK;
+ parent->rb_color = RB_RED;
+ __rb_rotate_left(parent, root);
+ other = parent->rb_right;
+ }
+ if ((!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ && (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK))
+ {
+ other->rb_color = RB_RED;
+ node = parent;
+ parent = node->rb_parent;
+ }
+ else
+ {
+ if (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK)
+ {
+ register rb_node_t * o_left;
+ if ((o_left = other->rb_left))
+ o_left->rb_color = RB_BLACK;
+ other->rb_color = RB_RED;
+ __rb_rotate_right(other, root);
+ other = parent->rb_right;
+ }
+ other->rb_color = parent->rb_color;
+ parent->rb_color = RB_BLACK;
+ if (other->rb_right)
+ other->rb_right->rb_color = RB_BLACK;
+ __rb_rotate_left(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ else
+ {
+ other = parent->rb_left;
+ if (other->rb_color == RB_RED)
+ {
+ other->rb_color = RB_BLACK;
+ parent->rb_color = RB_RED;
+ __rb_rotate_right(parent, root);
+ other = parent->rb_left;
+ }
+ if ((!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ && (!other->rb_right ||
+ other->rb_right->rb_color == RB_BLACK))
+ {
+ other->rb_color = RB_RED;
+ node = parent;
+ parent = node->rb_parent;
+ }
+ else
+ {
+ if (!other->rb_left ||
+ other->rb_left->rb_color == RB_BLACK)
+ {
+ register rb_node_t * o_right;
+ if ((o_right = other->rb_right))
+ o_right->rb_color = RB_BLACK;
+ other->rb_color = RB_RED;
+ __rb_rotate_left(other, root);
+ other = parent->rb_left;
+ }
+ other->rb_color = parent->rb_color;
+ parent->rb_color = RB_BLACK;
+ if (other->rb_left)
+ other->rb_left->rb_color = RB_BLACK;
+ __rb_rotate_right(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ }
+ if (node)
+ node->rb_color = RB_BLACK;
+}
+
+void rb_erase(rb_node_t * node, rb_root_t * root)
+{
+ rb_node_t * child, * parent;
+ int color;
+
+ if (!node->rb_left)
+ child = node->rb_right;
+ else if (!node->rb_right)
+ child = node->rb_left;
+ else
+ {
+ rb_node_t * old = node, * left;
+
+ node = node->rb_right;
+ while ((left = node->rb_left))
+ node = left;
+ child = node->rb_right;
+ parent = node->rb_parent;
+ color = node->rb_color;
+
+ if (child)
+ child->rb_parent = parent;
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ if (node->rb_parent == old)
+ parent = node;
+ node->rb_parent = old->rb_parent;
+ node->rb_color = old->rb_color;
+ node->rb_right = old->rb_right;
+ node->rb_left = old->rb_left;
+
+ if (old->rb_parent)
+ {
+ if (old->rb_parent->rb_left == old)
+ old->rb_parent->rb_left = node;
+ else
+ old->rb_parent->rb_right = node;
+ } else
+ root->rb_node = node;
+
+ old->rb_left->rb_parent = node;
+ if (old->rb_right)
+ old->rb_right->rb_parent = node;
+ goto color;
+ }
+
+ parent = node->rb_parent;
+ color = node->rb_color;
+
+ if (child)
+ child->rb_parent = parent;
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ color:
+ if (color == RB_BLACK)
+ __rb_erase_color(child, parent, root);
+}
+EXPORT_SYMBOL(rb_erase);
/* query VBD information for self or others (or all) */
if ( (ret = vbd_probe(&op.u.probe_params)) == 0 )
copy_to_user(u_block_io_op, &op, sizeof(op));
- break;
+ break;
case BLOCK_IO_OP_VBD_INFO:
/* query information about a particular VBD */
phys_seg[nr_psegs].nr_sects = nr_sects;
/* Translate the request into the relevant 'physical device' */
- new_segs = vbd_translate(&phys_seg[nr_psegs], p, operation);
-
- /* If it fails we bail (unless the caller is privileged). */
+ new_segs = vbd_translate(&phys_seg[nr_psegs], p, operation);
if ( new_segs < 0 )
{
- if ( unlikely(new_segs != -ENODEV) || unlikely(!IS_PRIV(p)) )
- {
- DPRINTK("access denied: %s of [%ld,%ld] on dev=%04x\n",
- operation == READ ? "read" : "write",
- req->sector_number + tot_sects,
- req->sector_number + tot_sects + nr_sects,
- req->device);
- goto bad_descriptor;
- }
- new_segs = 1;
+ DPRINTK("access denied: %s of [%ld,%ld] on dev=%04x\n",
+ operation == READ ? "read" : "write",
+ req->sector_number + tot_sects,
+ req->sector_number + tot_sects + nr_sects,
+ req->device);
+ goto bad_descriptor;
}
nr_psegs += new_segs;
-/*
- * xen_vbd.c : routines for managing virtual block devices
+/******************************************************************************
+ * xen_vbd.c
+ *
+ * Routines for managing virtual block devices.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
*/
#include <xeno/config.h>
#include <hypervisor-ifs/hypervisor-if.h>
#include <xeno/event.h>
-/*
-** XXX SMH: the below probe functions /append/ their info to the
-** xdi array; i.e. they assume that all earlier slots are correctly
-** filled, and that xdi->count points to the first free entry in
-** the array. All kinda gross but it'll do for now.
-*/
-extern int ide_probe_devices(xen_disk_info_t *xdi);
-extern int scsi_probe_devices(xen_disk_info_t *xdi);
-
-/* XXX SMH: crappy 'hash function' .. fix when care. */
-#define HSH(_x) ((_x) & (VBD_HTAB_SZ - 1))
-
-
-/*
-** Create a new VBD; all this involves is adding an entry to the domain's
-** vbd hash table; caller must be privileged.
-*/
-long vbd_create(vbd_create_t *create)
+long __vbd_create(struct task_struct *p,
+ unsigned short vdevice,
+ unsigned char mode,
+ unsigned char type)
{
- struct task_struct *p;
- vbd_t *new_vbd, **pv;
+ vbd_t *vbd;
+ rb_node_t **rb_p, *rb_parent = NULL;
long ret = 0;
unsigned long cpu_mask;
- if ( unlikely(!IS_PRIV(current)) )
- return -EPERM;
-
- if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) )
- {
- DPRINTK("vbd_create attempted for non-existent domain %d\n",
- create->domain);
- return -EINVAL;
- }
-
spin_lock(&p->vbd_lock);
- for ( pv = &p->vbdtab[HSH(create->vdevice)];
- *pv != NULL;
- pv = &(*pv)->next )
+ rb_p = &p->vbd_rb.rb_node;
+ while ( *rb_p != NULL )
{
- if ( unlikely((*pv)->vdevice == create->vdevice) )
+ rb_parent = *rb_p;
+ vbd = rb_entry(rb_parent, vbd_t, rb);
+ if ( vdevice < vbd->vdevice )
+ {
+ rb_p = &rb_parent->rb_left;
+ }
+ else if ( vdevice > vbd->vdevice )
+ {
+ rb_p = &rb_parent->rb_right;
+ }
+ else
{
DPRINTK("vbd_create attempted for already existing vbd\n");
ret = -EINVAL;
goto out;
}
- if ( (*pv)->vdevice > create->vdevice )
- break;
}
- if ( unlikely((new_vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
+ if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
{
DPRINTK("vbd_create: out of memory\n");
ret = -ENOMEM;
goto out;
}
- new_vbd->vdevice = create->vdevice;
- new_vbd->mode = create->mode;
- new_vbd->extents = NULL;
- new_vbd->next = *pv;
+ vbd->vdevice = vdevice;
+ vbd->mode = mode;
+ vbd->type = type;
+ vbd->extents = NULL;
- *pv = new_vbd;
+ rb_link_node(&vbd->rb, rb_parent, rb_p);
+ rb_insert_color(&vbd->rb, &p->vbd_rb);
cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
guest_event_notify(cpu_mask);
out:
spin_unlock(&p->vbd_lock);
- put_task_struct(p);
return ret;
}
-/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
-long vbd_grow(vbd_grow_t *grow)
+long vbd_create(vbd_create_t *create)
{
- struct task_struct *p;
- xen_extent_le_t **px, *x;
- vbd_t *v;
- long ret = 0;
- unsigned long cpu_mask;
+ struct task_struct *p;
+ long rc;
if ( unlikely(!IS_PRIV(current)) )
- return -EPERM;
+ return -EPERM;
- if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) )
+ if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) )
{
- DPRINTK("vbd_grow: attempted for non-existent domain %d\n",
- grow->domain);
+ DPRINTK("vbd_create attempted for non-existent domain %d\n",
+ domain);
return -EINVAL;
}
+ rc = __vbd_create(p, create->vdevice, create->mode,
+ XD_TYPE_DISK | XD_FLAG_VIRT);
+
+ put_task_struct(p);
+
+ return rc;
+}
+
+
+long __vbd_grow(struct task_struct *p,
+ unsigned short vdevice,
+ xen_extent_t *extent)
+{
+ xen_extent_le_t **px, *x;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
+ long ret = 0;
+ unsigned long cpu_mask;
+
spin_lock(&p->vbd_lock);
- for ( v = p->vbdtab[HSH(grow->vdevice)]; v != NULL; v = v->next )
- if ( v->vdevice == grow->vdevice )
- break;
+ rb = p->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ break;
+ }
- if ( unlikely(v == NULL) )
+ if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
{
DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
ret = -EINVAL;
- goto out;
- }
+ goto out;
+ }
if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
{
goto out;
}
- x->extent.device = grow->extent.device;
- x->extent.start_sector = grow->extent.start_sector;
- x->extent.nr_sectors = grow->extent.nr_sectors;
+ x->extent.device = extent->device;
+ x->extent.start_sector = extent->start_sector;
+ x->extent.nr_sectors = extent->nr_sectors;
x->next = (xen_extent_le_t *)NULL;
- for ( px = &v->extents; *px != NULL; px = &(*px)->next )
+ for ( px = &vbd->extents; *px != NULL; px = &(*px)->next )
continue;
*px = x;
out:
spin_unlock(&p->vbd_lock);
- put_task_struct(p);
return ret;
}
+/* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
+long vbd_grow(vbd_grow_t *grow)
+{
+ struct task_struct *p;
+ long rc;
+
+ if ( unlikely(!IS_PRIV(current)) )
+ return -EPERM;
+
+ if ( unlikely((p = find_domain_by_id(grow->domain)) == NULL) )
+ {
+ DPRINTK("vbd_grow: attempted for non-existent domain %d\n",
+ grow->domain);
+ return -EINVAL;
+ }
+
+ rc = __vbd_grow(p, grow->vdevice, &grow->extent);
+
+ put_task_struct(p);
+
+ return rc;
+}
+
+
long vbd_shrink(vbd_shrink_t *shrink)
{
struct task_struct *p;
xen_extent_le_t **px, *x;
- vbd_t *v;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
long ret = 0;
unsigned long cpu_mask;
spin_lock(&p->vbd_lock);
- for ( v = p->vbdtab[HSH(shrink->vdevice)]; v != NULL; v = v->next )
- if ( v->vdevice == shrink->vdevice )
- break;
+ rb = p->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( shrink->vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( shrink->vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ break;
+ }
- if ( unlikely(v == NULL) || unlikely(v->extents == NULL) )
+ if ( unlikely(vbd == NULL) ||
+ unlikely(vbd->vdevice != shrink->vdevice) ||
+ unlikely(vbd->extents == NULL) )
{
DPRINTK("vbd_shrink: attempt to remove non-existent extent.\n");
ret = -EINVAL;
}
/* Find the last extent. We now know that there is at least one. */
- for ( px = &v->extents; (*px)->next != NULL; px = &(*px)->next )
+ for ( px = &vbd->extents; (*px)->next != NULL; px = &(*px)->next )
continue;
x = *px;
struct task_struct *p;
xen_extent_t e;
xen_extent_le_t *new_extents, *x, *t;
- vbd_t *v;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
int i;
long ret = 0;
unsigned long cpu_mask;
spin_lock(&p->vbd_lock);
- for ( v = p->vbdtab[HSH(setextents->vdevice)]; v != NULL; v = v->next )
- if ( v->vdevice == setextents->vdevice )
- break;
+ rb = p->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( setextents->vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( setextents->vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ break;
+ }
- if ( unlikely(v == NULL) )
+ if ( unlikely(vbd == NULL) ||
+ unlikely(vbd->vdevice != setextents->vdevice) )
{
DPRINTK("vbd_setextents: attempt to modify non-existent VBD.\n");
ret = -EINVAL;
}
/* Delete the old extent list _after_ successfully creating the new. */
- for ( x = v->extents; x != NULL; x = t )
+ for ( x = vbd->extents; x != NULL; x = t )
{
t = x->next;
kfree(x);
}
/* Make the new list visible. */
- v->extents = new_extents;
+ vbd->extents = new_extents;
cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
guest_event_notify(cpu_mask);
long vbd_delete(vbd_delete_t *delete)
{
struct task_struct *p;
- vbd_t *v, **pv;
+ vbd_t *vbd;
+ rb_node_t *rb;
xen_extent_le_t *x, *t;
unsigned long cpu_mask;
spin_lock(&p->vbd_lock);
- for ( pv = &p->vbdtab[HSH(delete->vdevice)];
- *pv != NULL;
- pv = &(*pv)->next )
+ rb = p->vbd_rb.rb_node;
+ while ( rb != NULL )
{
- if ( (*pv)->vdevice == delete->vdevice )
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( delete->vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( delete->vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
goto found;
}
-
+
DPRINTK("vbd_delete attempted for non-existing VBD.\n");
spin_unlock(&p->vbd_lock);
return -EINVAL;
found:
- v = *pv;
- *pv = v->next;
- x = v->extents;
- kfree(v);
+ rb_erase(rb, &p->vbd_rb);
+ x = vbd->extents;
+ kfree(vbd);
while ( x != NULL )
{
void destroy_all_vbds(struct task_struct *p)
{
- int i;
- vbd_t *v;
+ vbd_t *vbd;
+ rb_node_t *rb;
xen_extent_le_t *x, *t;
unsigned long cpu_mask;
spin_lock(&p->vbd_lock);
- for ( i = 0; i < VBD_HTAB_SZ; i++ )
+
+ while ( (rb = p->vbd_rb.rb_node) != NULL )
{
- while ( (v = p->vbdtab[i]) != NULL )
+ vbd = rb_entry(rb, vbd_t, rb);
+
+ rb_erase(rb, &p->vbd_rb);
+ x = vbd->extents;
+ kfree(vbd);
+
+ while ( x != NULL )
{
- p->vbdtab[i] = v->next;
-
- x = v->extents;
- kfree(v);
-
- while ( x != NULL )
- {
- t = x->next;
- kfree(x);
- x = t;
- }
- }
+ t = x->next;
+ kfree(x);
+ x = t;
+ }
}
cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD);
}
-/*
- * vbd_probe_devices:
- *
- * add the virtual block devices for this domain to a xen_disk_info_t;
- * we assume xdi->count points to the first unused place in the array.
- */
-static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
+static int vbd_probe_single(xen_disk_info_t *xdi,
+ vbd_t *vbd,
+ struct task_struct *p)
{
xen_extent_le_t *x;
xen_disk_t cur_disk;
- vbd_t *v;
- int i;
- spin_lock(&p->vbd_lock);
+ if ( xdi->count == xdi->max )
+ {
+ DPRINTK("vbd_probe_devices: out of space for probe.\n");
+ return -ENOMEM;
+ }
- for ( i = 0; i < VBD_HTAB_SZ; i++ )
+ cur_disk.device = vbd->vdevice;
+ cur_disk.info = vbd->type;
+ if ( !VBD_CAN_WRITE(vbd) )
+ cur_disk.info |= XD_FLAG_RO;
+ cur_disk.capacity = 0 ;
+ for ( x = vbd->extents; x != NULL; x = x->next )
+ cur_disk.capacity += x->extent.nr_sectors;
+ cur_disk.domain = p->domain;
+
+ /* Now copy into relevant part of user-space buffer */
+ if( copy_to_user(&xdi->disks[xdi->count],
+ &cur_disk,
+ sizeof(xen_disk_t)) )
{
- for ( v = p->vbdtab[i]; v != NULL; v = v->next )
- {
- if ( xdi->count == xdi->max )
- {
- DPRINTK("vbd_probe_devices: out of space for probe.\n");
- spin_unlock(&p->vbd_lock);
- return -ENOMEM;
- }
-
- cur_disk.device = v->vdevice;
- cur_disk.info = XD_FLAG_VIRT | XD_TYPE_DISK;
- if ( !VBD_CAN_WRITE(v) )
- cur_disk.info |= XD_FLAG_RO;
- cur_disk.capacity = 0 ;
- for ( x = v->extents; x != NULL; x = x->next )
- cur_disk.capacity += x->extent.nr_sectors;
- cur_disk.domain = p->domain;
-
- /* Now copy into relevant part of user-space buffer */
- if( copy_to_user(&xdi->disks[xdi->count],
- &cur_disk,
- sizeof(xen_disk_t)) )
- {
- DPRINTK("vbd_probe_devices: copy_to_user failed\n");
- spin_unlock(&p->vbd_lock);
- return -EFAULT;
- }
+ DPRINTK("vbd_probe_devices: copy_to_user failed\n");
+ return -EFAULT;
+ }
- xdi->count++;
+ xdi->count++;
+
+ return 0;
+}
+
+
+static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
+{
+ int rc = 0;
+ rb_node_t *rb;
+
+ spin_lock(&p->vbd_lock);
+
+ if ( (rb = p->vbd_rb.rb_node) == NULL )
+ goto out;
+
+ new_subtree:
+ /* STEP 1. Find least node (it'll be left-most). */
+ while ( rb->rb_left != NULL )
+ rb = rb->rb_left;
+
+ for ( ; ; )
+ {
+ /* STEP 2. Dealt with left subtree. Now process current node. */
+ if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
+ goto out;
+
+ /* STEP 3. Process right subtree, if any. */
+ if ( rb->rb_right != NULL )
+ {
+ rb = rb->rb_right;
+ goto new_subtree;
}
- }
+ /* STEP 4. Done both subtrees. Head back through ancesstors. */
+ for ( ; ; )
+ {
+ /* We're done when we get back to the root node. */
+ if ( rb->rb_parent == NULL )
+ goto out;
+ /* If we are left of parent, then parent is next to process. */
+ if ( rb->rb_parent->rb_left == rb )
+ break;
+ /* If we are right of parent, then we climb to grandparent. */
+ rb = rb->rb_parent;
+ }
+
+ rb = rb->rb_parent;
+ }
+
+ out:
spin_unlock(&p->vbd_lock);
- return 0;
+ return rc;
}
/*
-** Return information about the VBDs available for a given domain,
-** or for all domains; in the general case the 'domain' argument
-** will be 0 which means "information about the caller"; otherwise
-** the 'domain' argument will specify either a given domain, or
-** all domains ("VBD_PROBE_ALL") -- both of these cases require the
-** caller to be privileged.
-*/
+ * Return information about the VBDs available for a given domain, or for all
+ * domains; in the general case the 'domain' argument will be 0 which means
+ * "information about the caller"; otherwise the 'domain' argument will
+ * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of
+ * these cases require the caller to be privileged.
+ */
long vbd_probe(vbd_probe_t *probe)
{
struct task_struct *p = NULL;
if ( probe->domain != 0 )
{
- /* We can only probe for ourselves unless we're privileged. */
+ /* We can only probe for ourselves (unless we're privileged). */
if( (probe->domain != current->domain) && !IS_PRIV(current) )
return -EPERM;
get_task_struct(p); /* to mirror final put_task_struct */
}
- if ( (probe->domain == VBD_PROBE_ALL) || IS_PRIV(p) )
- {
- /* Privileged domains always get access to the 'real' devices. */
- if ( ((ret = ide_probe_devices(&probe->xdi)) != 0) ||
- ((ret = scsi_probe_devices(&probe->xdi)) != 0) )
- goto out;
- }
-
if ( probe->domain == VBD_PROBE_ALL )
{
read_lock_irqsave(&tasklist_lock, flags);
struct task_struct *p;
xen_extent_le_t *x;
xen_extent_t *extents;
- vbd_t *v;
+ vbd_t *vbd = NULL;
+ rb_node_t *rb;
long ret = 0;
if ( (info->domain != current->domain) && !IS_PRIV(current) )
spin_lock(&p->vbd_lock);
- for ( v = p->vbdtab[HSH(info->vdevice)]; v != NULL; v = v->next )
- if ( v->vdevice == info->vdevice )
- break;
+ rb = p->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( info->vdevice < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( info->vdevice > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ break;
+ }
- if ( v == NULL )
+ if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
{
DPRINTK("vbd_info attempted on non-existent VBD.\n");
ret = -EINVAL;
goto out;
}
- info->mode = v->mode;
+ info->mode = vbd->mode;
info->nextents = 0;
extents = info->extents;
- for ( x = v->extents; x != NULL; x = x->next )
+ for ( x = vbd->extents; x != NULL; x = x->next )
{
if ( info->nextents == info->maxextents )
break;
int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
{
xen_extent_le_t *x;
- vbd_t *v;
+ vbd_t *vbd;
+ rb_node_t *rb;
unsigned long sec_off, nr_secs;
spin_lock(&p->vbd_lock);
- for ( v = p->vbdtab[HSH(pseg->dev)]; v != NULL; v = v->next )
- if ( v->vdevice == pseg->dev )
- goto found;
+ rb = p->vbd_rb.rb_node;
+ while ( rb != NULL )
+ {
+ vbd = rb_entry(rb, vbd_t, rb);
+ if ( pseg->dev < vbd->vdevice )
+ rb = rb->rb_left;
+ else if ( pseg->dev > vbd->vdevice )
+ rb = rb->rb_right;
+ else
+ goto found;
+ }
- if ( unlikely(!IS_PRIV(p)) )
- DPRINTK("vbd_translate; domain %d attempted to access "
- "non-existent VBD.\n", p->domain);
+ DPRINTK("vbd_translate; domain %d attempted to access "
+ "non-existent VBD.\n", p->domain);
spin_unlock(&p->vbd_lock);
return -ENODEV;
found:
- if ( ((operation == READ) && !VBD_CAN_READ(v)) ||
- ((operation == WRITE) && !VBD_CAN_WRITE(v)) )
+ if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
+ ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
{
spin_unlock(&p->vbd_lock);
return -EACCES;
*/
sec_off = pseg->sector_number;
nr_secs = pseg->nr_sects;
- for ( x = v->extents; x != NULL; x = x->next )
+ for ( x = vbd->extents; x != NULL; x = x->next )
{
if ( sec_off < x->extent.nr_sectors )
{
#include <asm/domain_page.h>
#include <asm/io.h>
-#define NR_IDE_DEVS 20
-
-static kdev_t ide_devs[NR_IDE_DEVS] = {
+static kdev_t ide_devs[] = {
MKDEV(IDE0_MAJOR, 0), MKDEV(IDE0_MAJOR, 64), /* hda, hdb */
MKDEV(IDE1_MAJOR, 0), MKDEV(IDE1_MAJOR, 64), /* hdc, hdd */
MKDEV(IDE2_MAJOR, 0), MKDEV(IDE2_MAJOR, 64), /* hde, hdf */
MKDEV(IDE9_MAJOR, 0), MKDEV(IDE9_MAJOR, 64) /* hds, hdt */
};
-
-
-
-int ide_probe_devices(xen_disk_info_t* xdi)
+void ide_probe_devices(xen_disk_info_t* xdi)
{
- int loop, ret = 0;
- unsigned int unit;
- unsigned short type;
+ int i, unit;
ide_drive_t *drive;
- xen_disk_t cur_disk;
+ xen_disk_t *xd = &xdi->disks[xdi->count];
- for ( loop = 0; loop < MAX_HWIFS; loop++ )
+ for ( i = 0; i < MAX_HWIFS; i++ )
{
- ide_hwif_t *hwif = &ide_hwifs[loop];
+ ide_hwif_t *hwif = &ide_hwifs[i];
if ( !hwif->present ) continue;
for ( unit = 0; unit < MAX_DRIVES; unit++ )
{
drive = &hwif->drives[unit];
- if ( !drive->present ) continue;
-
-
- /* SMH: don't ever expect this to happen, hence verbose printk */
- if ( xdi->count == xdi->max ) {
- printk("ide_probe_devices: out of space for probe.\n");
- return -ENOMEM;
- }
+ if ( !drive->present )
+ continue;
-
-
- /* SMH: we export 'raw' linux device numbers to domain 0 */
- cur_disk.device = ide_devs[(loop * MAX_DRIVES) + unit];
+ if ( xdi->count == xdi->max )
+ BUG();
- /*
- ** NB: we use the ide 'media' field (ide_disk, ide_cdrom, etc)
- ** as our 'type' field (XD_TYPE_DISK, XD_TYPE_CDROM, etc).
- ** Hence must ensure these are kept in sync.
- */
- cur_disk.info = (type = drive->media);
- if(type == XD_TYPE_CDROM)
- cur_disk.info |= XD_FLAG_RO;
+ /* We export 'raw' linux device numbers to domain 0. */
+ xd->device = ide_devs[(i * MAX_DRIVES) + unit];
- cur_disk.capacity = current_capacity(drive);
- cur_disk.domain = 0; /* 'physical' disks belong to domain 0 */
+ /*
+ * NB: we use the ide 'media' field (ide_disk, ide_cdrom, etc) as
+ * our 'type' field (XD_TYPE_DISK, XD_TYPE_CDROM, etc). Hence must
+ * ensure these are kept in sync.
+ */
+ if ( (xd->info = drive->media) == XD_TYPE_CDROM )
+ xd->info |= XD_FLAG_RO;
- /* Now copy into relevant part of user-space buffer */
- if((ret = copy_to_user(xdi->disks + xdi->count, &cur_disk,
- sizeof(xen_disk_t))) < 0) {
- printk("ide_probe_devices: copy_to_user failed [rc=%d]\n",
- ret);
- return ret;
- }
+ xd->capacity = current_capacity(drive);
+ xd->domain = 0;
xdi->count++;
+ xd++;
}
}
-
- return ret;
}
#include <xeno/config.h>
#include <xeno/module.h>
-
-/* #include <xeno/fs.h> */
-/* #include <xeno/kernel.h> */
#include <xeno/sched.h>
-/* #include <xeno/mm.h> */
-/* #include <xeno/string.h> */
#include <xeno/hdreg.h>
-/* #include <xeno/errno.h> */
-/* #include <xeno/interrupt.h> */
#include <xeno/init.h>
-/* #include <xeno/smp.h> */
-
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/io.h>
**
*/
-#define NR_SCSI_DEVS 16
-
-static kdev_t scsi_devs[NR_SCSI_DEVS] = {
+static kdev_t scsi_devs[] = {
MKDEV(SCSI_DISK0_MAJOR, 0), MKDEV(SCSI_DISK0_MAJOR, 16), /* sda, sdb */
MKDEV(SCSI_DISK0_MAJOR, 32), MKDEV(SCSI_DISK0_MAJOR, 48), /* sdc, sdd */
MKDEV(SCSI_DISK0_MAJOR, 64), MKDEV(SCSI_DISK0_MAJOR, 80), /* sde, sdf */
};
-int scsi_probe_devices(xen_disk_info_t *xdi)
+void scsi_probe_devices(xen_disk_info_t *xdi)
{
+ int i;
Scsi_Disk *sd;
- xen_disk_t cur_disk;
- int i, ret;
+ xen_disk_t *xd = &xdi->disks[xdi->count];
for ( sd = rscsi_disks, i = 0; i < sd_template.dev_max; i++, sd++ )
{
- if ( sd->device == NULL ) continue;
+ if ( sd->device == NULL )
+ continue;
- /* SMH: don't ever expect this to happen, hence verbose printk */
- if ( xdi->count == xdi->max ) {
- printk("scsi_probe_devices: out of space for probe.\n");
- return -ENOMEM;
- }
+ if ( xdi->count == xdi->max )
+ BUG();
+
+ /* We export 'raw' linux device numbers to domain 0. */
+ xd->device = scsi_devs[i];
+ xd->info = XD_TYPE_DISK; /* XXX should determine properly */
+ xd->capacity = sd->capacity;
+ xd->domain = 0;
- /* SMH: we export 'raw' linux device numbers to domain 0 */
- cur_disk.device = scsi_devs[i];
- cur_disk.info = XD_TYPE_DISK; // XXX SMH: should determine properly
- cur_disk.capacity = sd->capacity;
- cur_disk.domain = 0; // 'physical' disks belong to dom0
-
- /* Now copy into relevant part of user-space buffer */
- if((ret = copy_to_user(xdi->disks + xdi->count, &cur_disk,
- sizeof(xen_disk_t))) < 0) {
- printk("scsi_probe_devices: copy_to_user failed [rc=%d]\n", ret);
- return ret;
- }
-
xdi->count++;
+ xd++;
}
-
- return 0;
}
#define MULTIBOOT_BOOTLOADER_MAGIC 0x2BADB002
/* The symbol table for a.out. */
-typedef struct aout_symbol_table
+typedef struct
{
unsigned long tabsize;
unsigned long strsize;
} aout_symbol_table_t;
/* The section header table for ELF. */
-typedef struct elf_section_header_table
+typedef struct
{
unsigned long num;
unsigned long size;
} elf_section_header_table_t;
/* The Multiboot information. */
-typedef struct multiboot_info
+typedef struct
{
unsigned long flags;
unsigned long mem_lower;
} multiboot_info_t;
/* The module structure. */
-typedef struct module
+typedef struct
{
unsigned long mod_start;
unsigned long mod_end;
/* The memory map. Be careful that the offset 0 is base_addr_low
but no size. */
-typedef struct memory_map
+typedef struct
{
unsigned long size;
unsigned long base_addr_low;
--- /dev/null
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree.h
+
+ To use rbtrees you'll have to implement your own insert and search cores.
+ This will avoid us to use callbacks and to drop drammatically performances.
+ I know it's not the cleaner way, but in C (not in C++) to get
+ performances and genericity...
+
+ Some example of insert and search follows here. The search is a plain
+ normal search over an ordered tree. The insert instead must be implemented
+ int two steps: as first thing the code must insert the element in
+ order as a red leaf in the tree, then the support library function
+ rb_insert_color() must be called. Such function will do the
+ not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+ unsigned long offset)
+{
+ rb_node_t * n = inode->i_rb_page_cache.rb_node;
+ struct page * page;
+
+ while (n)
+ {
+ page = rb_entry(n, struct page, rb_page_cache);
+
+ if (offset < page->offset)
+ n = n->rb_left;
+ else if (offset > page->offset)
+ n = n->rb_right;
+ else
+ return page;
+ }
+ return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+ unsigned long offset,
+ rb_node_t * node)
+{
+ rb_node_t ** p = &inode->i_rb_page_cache.rb_node;
+ rb_node_t * parent = NULL;
+ struct page * page;
+
+ while (*p)
+ {
+ parent = *p;
+ page = rb_entry(parent, struct page, rb_page_cache);
+
+ if (offset < page->offset)
+ p = &(*p)->rb_left;
+ else if (offset > page->offset)
+ p = &(*p)->rb_right;
+ else
+ return page;
+ }
+
+ rb_link_node(node, parent, p);
+
+ return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+ unsigned long offset,
+ rb_node_t * node)
+{
+ struct page * ret;
+ if ((ret = __rb_insert_page_cache(inode, offset, node)))
+ goto out;
+ rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+ return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef _LINUX_RBTREE_H
+#define _LINUX_RBTREE_H
+
+#include <xeno/config.h>
+#include <xeno/lib.h>
+#include <xeno/module.h>
+
+typedef struct rb_node_s
+{
+ struct rb_node_s * rb_parent;
+ int rb_color;
+#define RB_RED 0
+#define RB_BLACK 1
+ struct rb_node_s * rb_right;
+ struct rb_node_s * rb_left;
+}
+rb_node_t;
+
+typedef struct rb_root_s
+{
+ struct rb_node_s * rb_node;
+}
+rb_root_t;
+
+#define RB_ROOT (rb_root_t) { NULL, }
+#define rb_entry(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+extern void rb_insert_color(rb_node_t *, rb_root_t *);
+extern void rb_erase(rb_node_t *, rb_root_t *);
+
+static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link)
+{
+ node->rb_parent = parent;
+ node->rb_color = RB_RED;
+ node->rb_left = node->rb_right = NULL;
+
+ *rb_link = node;
+}
+
+#endif /* _LINUX_RBTREE_H */
#include <xeno/time.h>
#include <xeno/ac_timer.h>
#include <xeno/delay.h>
+#include <xeno/rbtree.h>
#define MAX_DOMAIN_NAME 16
BLK_RING_IDX blk_resp_prod; /* (private version of) response producer */
struct list_head blkdev_list;
spinlock_t blk_ring_lock;
- vbd_t *vbdtab[VBD_HTAB_SZ]; /* mapping from 16-bit vdevices to vbds */
- spinlock_t vbd_lock;
+ rb_root_t vbd_rb; /* mapping from 16-bit vdevices to vbds */
+ spinlock_t vbd_lock; /* protects VBD mapping */
/* VM */
struct mm_struct mm;
#include <hypervisor-ifs/block.h>
#include <hypervisor-ifs/vbd.h>
-/* an entry in a list of xen_extent's */
+#include <xeno/rbtree.h>
+
+/* An entry in a list of xen_extents. */
typedef struct _xen_extent_le {
- xen_extent_t extent; /* an individual extent */
+ xen_extent_t extent; /* an individual extent */
struct _xen_extent_le *next; /* and a pointer to the next */
} xen_extent_le_t;
-
/*
-** This is what a vbd looks like from the pov of xen: essentially a list
-** of xen_extents which a given domain refers to by a particular 16bit id.
-** Each domain has a hash table to map from these to the relevant VBD.
-*/
+ * This is what a vbd looks like from the p.o.v. of xen: essentially a list of
+ * xen_extents which a given domain refers to by a particular 16bit id. Each
+ * domain has a lookup structure to map from these to the relevant VBD.
+ */
typedef struct _vbd {
unsigned short vdevice; /* what the domain refers to this vbd as */
- unsigned short mode; /* VBD_MODE_{READONLY,READWRITE} */
+ unsigned char mode; /* VBD_MODE_{R,W} */
+ unsigned char type; /* XD_TYPE_xxx */
xen_extent_le_t *extents; /* list of xen_extents making up this vbd */
- struct _vbd *next; /* for chaining in the hash table */
+ rb_node_t rb; /* for linking into R-B tree lookup struct */
} vbd_t;
-#define VBD_HTAB_SZ 16 /* # entries in the vbd hash table. */
+/*
+ * Internal forms of 'vbd_create' and 'vbd_grow. Used when setting up real
+ * physical device access for domain 0.
+ */
+long __vbd_create(struct task_struct *p,
+ unsigned short vdevice,
+ unsigned char mode,
+ unsigned char type);
+long __vbd_grow(struct task_struct *p,
+ unsigned short vdevice,
+ xen_extent_t *extent);
+/* This is the main API, accessible from guest OSes. */
long vbd_create(vbd_create_t *create_params);
long vbd_grow(vbd_grow_t *grow_params);
long vbd_shrink(vbd_shrink_t *shrink_params);
*
* Xenolinux virtual block-device driver.
*
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
*/
-/* Some modifications to the original by Mark A. Williamson and (C) Intel
- * Research Cambridge */
-
#include "xl_block.h"
#include <linux/blk.h>
#include <linux/cdrom.h>
static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
static BLK_RING_IDX req_prod; /* Private request producer. */
-#define XDI_MAX 64
-xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */
-
/* We plug the I/O ring if the driver is suspended or if the ring is full. */
#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
(state != STATE_ACTIVE))
}
+/*
+ * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
+ *
+ * Schedule a task for keventd to run, which will update the VBDs and perform
+ * the corresponding updates to our view of VBD state, so the XenoLinux will
+ * respond to changes / additions / deletions to the set of VBDs automatically.
+ */
+static struct tq_struct update_tq;
+static void update_vbds_task(void *unused)
+{
+ xlvbd_update_vbds();
+}
+static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+}
+
+
int xenolinux_block_open(struct inode *inode, struct file *filep)
{
short xldev = inode->i_rdev;
}
}
- /* RACE: need locking SMP / pre-emptive kernels */
+ /* Update of usage count is protected by per-device semaphore. */
disk->usage++;
- DPRINTK("xenolinux_block_open\n");
+
return 0;
}
int xenolinux_block_release(struct inode *inode, struct file *filep)
{
xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
- disk->usage--; /* RACE: need locking for SMP / pre-emptive kernels */
- DPRINTK("xenolinux_block_release\n");
-
- /* A reference to a disk has been dropped: may enable more changes to VBDs to
- * go through (currently don't do any updates while references are held), so
- * we run the update magic again. Could equally well schedule this update for
- * keventd to run, or use a flag so we only update at this point if we think
- * something (relevant) may have changed.
- * Keventd has the advantage that it'll serialise executions of this function
- * - there's a race here for SMP / pre-emptive kernels */
- xlvbd_update_vbds();
+
+ /*
+ * When usage drops to zero it may allow more VBD updates to occur.
+ * Update of usage count is protected by a per-device semaphore.
+ */
+ if ( --disk->usage == 0 )
+ {
+ update_tq.routine = update_vbds_task;
+ schedule_task(&update_tq);
+ }
return 0;
}
return 0;
}
-/* MAW - leaving this as it is for now. As long as we're responding to the VBD
- * update events from the hypervisor, I figure this will still do what it's
- * meant to do :-) */
int xenolinux_block_revalidate(kdev_t dev)
{
- struct gendisk *gd = get_gendisk(dev);
- xl_disk_t *disk = xldev_to_xldisk(dev);
- unsigned long flags, capacity = gd->part[MINOR(dev)].nr_sects;
- int i, disk_nr = MINOR(dev) >> gd->minor_shift;
+ struct block_device *bd;
+ struct gendisk *gd;
+ xl_disk_t *disk;
+ unsigned long flags, capacity;
+ int i, rc = 0, disk_nr = MINOR(dev) >> gd->minor_shift;
- DPRINTK("xenolinux_block_revalidate: %d\n", dev);
+ if ( (bd = bdget(dev)) == NULL )
+ return -EINVAL;
/*
- * We didn't construct this VBD by reading a partition table. This
- * function can only do bad things to us.
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
*/
- if ( capacity == 0 )
- return -EINVAL;
+ down(&bd->bd_sem);
- spin_lock_irqsave(&io_request_lock, flags);
- if ( disk->usage > 1 )
+ if ( ((gd = get_gendisk(dev)) == NULL) ||
+ ((disk = xldev_to_xldisk(dev)) == NULL) ||
+ ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
{
- spin_unlock_irqrestore(&io_request_lock, flags);
- return -EBUSY;
+ rc = -EINVAL;
+ goto out;
}
- spin_unlock_irqrestore(&io_request_lock, flags);
- /* RACE? is it OK that we give up the lock */
+ if ( disk->usage > 1 )
+ {
+ rc = -EBUSY;
+ goto out;
+ }
for ( i = gd->max_p - 1; i >= 0; i-- )
{
gd->sizes[MINOR(dev+i)] = 0;
}
- /* shouldn't need to revalidate VBDs here as it's done automatically when
- * we get the VBD update event from Xen */
-
grok_partitions(gd, disk_nr, gd->max_p, capacity);
- return 0;
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
}
}
-/**
- * do_update_vbds - called in process context by keventd to update VBDs
- * @arg: dummy argument to fit schedule_task API
- *
- * When this function is run, it simply calls through to xlvbd_update_vbds in
- * update the VBD state information. The argument is ignored - it's only there
- * because the API for scheduling with keventd requires it.
- */
-void do_update_vbds(void * arg)
-{
- DPRINTK("xl_block.c::do_update_vbds() - called\n");
- xlvbd_update_vbds();
-}
-
-/* this data is needed to register do_update_vbds() as a task for keventd */
-static struct tq_struct update = {
- .sync = 0,
- .routine = do_update_vbds,
- .data = 0
-};
-
-/**
- * xlblk_update_int - handle VBD update events from Xen
- *
- * This function schedules a task for keventd to run, which will update the
- * VBDs and perform the corresponding updates to our view of VBD state, so the
- * XenoLinux will respond to changes / additions / deletions to the set of VBDs
- * automatically.
- */
-static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
-{
- DPRINTK("xl_block.c::xlblk_update_int() - called\n");
-
- schedule_task(&update);
-}
-
-
static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
{
BLK_RING_IDX i;
}
-
static void reset_xlblk_interface(void)
{
block_io_op_t op;
int __init xlblk_init(void)
{
int error;
- block_io_op_t op;
reset_xlblk_interface();
goto fail;
}
- /* Setup our [empty] disk information structure */
- xlblk_disk_info.max = XDI_MAX;
- xlblk_disk_info.disks = kmalloc(XDI_MAX * sizeof(xen_disk_t), GFP_KERNEL);
- xlblk_disk_info.count = 0;
-
- /* Probe for disk information. */
- memset(&op, 0, sizeof(op));
- op.cmd = BLOCK_IO_OP_VBD_PROBE;
- op.u.probe_params.domain = 0;
- memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info));
-
- error = HYPERVISOR_block_io_op(&op);
-
- if ( error )
- {
- printk(KERN_ALERT "Could not probe disks (%d)\n", error);
- free_irq(XLBLK_RESPONSE_IRQ, NULL);
- goto fail;
- }
-
- /* copy back the [updated] count parameter */
- xlblk_disk_info.count = op.u.probe_params.xdi.count;
-
- /* Pass the information to our virtual block device susbystem. */
- xlvbd_init(&xlblk_disk_info);
+ (void)xlvbd_init();
return 0;
return error;
}
+
static void __exit xlblk_cleanup(void)
{
xlvbd_cleanup();
int usage;
} xl_disk_t;
-/* Generic layer. */
extern int xenolinux_control_msg(int operration, char *buffer, int size);
extern int xenolinux_block_open(struct inode *inode, struct file *filep);
extern int xenolinux_block_release(struct inode *inode, struct file *filep);
extern int xenolinux_block_revalidate(kdev_t dev);
extern void do_xlblk_request (request_queue_t *rq);
-extern xen_disk_info_t xlblk_disk_info; /* this is really in xl_block.c */
-extern void xlvbd_update_vbds(void); /* this is really in xl_vbd.c */
+extern void xlvbd_update_vbds(void);
static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
{
struct gendisk *gd = get_gendisk(xldev);
-
- if(!gd) return NULL;
-
+
+ if ( gd == NULL )
+ return NULL;
+
return (xl_disk_t *)gd->real_devices +
(MINOR(xldev) >> gd->minor_shift);
}
/* Virtual block-device subsystem. */
-extern int xlvbd_init(xen_disk_info_t *xdi);
+extern int xlvbd_init(void);
extern void xlvbd_cleanup(void);
#endif /* __XL_BLOCK_H__ */
*
* Xenolinux virtual block-device driver (xvd).
*
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
*/
-/* Some modifications to the original by Mark A. Williamson and (C) Intel
- * Research Cambridge */
-
#include "xl_block.h"
#include <linux/blk.h>
#define XLSCSI_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */
#define XLSCSI_MAX_PART (1 << XLSCSI_PARTN_SHIFT) /* minors per scsi vbd */
-#define XLVBD_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */
+#define XLVBD_PARTN_SHIFT 4 /* amount to shift minor to get 'real' minor */
#define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
-/* Used to record data in vbd_state[] and detect changes in configuration */
-#define VBD_NODEV 1
-#define VBD_KNOWN 2
-
/* The below are for the generic drivers/block/ll_rw_block.c code. */
static int xlide_blksize_size[256];
static int xlide_hardsect_size[256];
static int xlvbd_hardsect_size[256];
static int xlvbd_max_sectors[256];
+/* Information from Xen about our VBDs. */
+#define MAX_VBDS 64
+static int nr_vbds;
+static xen_disk_t *vbd_info;
+
static struct block_device_operations xlvbd_block_fops =
{
open: xenolinux_block_open,
revalidate: xenolinux_block_revalidate,
};
- /* hold state about for all possible VBDs for use in handling updates */
-static char vbd_state[65536];
+static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
+{
+ int error;
+ block_io_op_t op;
+
+ /* Probe for disk information. */
+ memset(&op, 0, sizeof(op));
+ op.cmd = BLOCK_IO_OP_VBD_PROBE;
+ op.u.probe_params.domain = 0;
+ op.u.probe_params.xdi.max = MAX_VBDS;
+ op.u.probe_params.xdi.disks = disk_info;
+ op.u.probe_params.xdi.count = 0;
+
+ if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
+ {
+ printk(KERN_ALERT "Could not probe disks (%d)\n", error);
+ return -1;
+ }
+
+ return op.u.probe_params.xdi.count;
+}
-/**
+/*
* xlvbd_init_device - initialise a VBD device
* @disk: a xen_disk_t describing the VBD
*
* corruption does not occur. Also, devices that are in use should not have
* their details updated. This is the caller's responsibility.
*/
-int xlvbd_init_device(xen_disk_t *disk)
+static int xlvbd_init_device(xen_disk_t *xd)
{
- int device = disk->device;
+ int device = xd->device;
int major = MAJOR(device);
int minor = MINOR(device);
int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */
int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */
- int partno;
- char * major_name;
- int max_part;
-
+ char *major_name;
struct gendisk *gd;
- int result;
- int j;
+ struct block_device *bd;
+ xl_disk_t *disk;
+ int i, rc = 0, max_part, partno;
unsigned char buf[64];
+ if ( (bd = bdget(device)) == NULL )
+ return -1;
+
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
+ {
+ printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
+ rc = -1;
+ goto out;
+ }
+
if ( is_ide )
{
major_name = XLIDE_MAJOR_NAME;
if ( (gd = get_gendisk(device)) == NULL )
{
- result = register_blkdev(major, major_name, &xlvbd_block_fops);
- if ( result < 0 )
+ rc = register_blkdev(major, major_name, &xlvbd_block_fops);
+ if ( rc < 0 )
{
printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
- return -1; /* XXX make this sane one day */
+ goto out;
}
if ( is_ide )
blk_size[major] = gd->sizes;
}
- if ( XD_READONLY(disk->info) )
+ if ( XD_READONLY(xd->info) )
set_device_ro(device, 1);
gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO;
if ( gd->sizes[minor & ~(max_part-1)] != 0 )
{
kdev_t dev = device & ~(max_part-1);
- for ( j = max_part - 1; j >= 0; j-- )
+ for ( i = max_part - 1; i >= 0; i-- )
{
- invalidate_device(dev+j, 1);
- gd->part[MINOR(dev+j)].start_sect = 0;
- gd->part[MINOR(dev+j)].nr_sects = 0;
- gd->sizes[MINOR(dev+j)] = 0;
-
- vbd_state[dev+j] &= ~VBD_KNOWN;
+ invalidate_device(dev+i, 1);
+ gd->part[MINOR(dev+i)].start_sect = 0;
+ gd->part[MINOR(dev+i)].nr_sects = 0;
+ gd->sizes[MINOR(dev+i)] = 0;
}
printk(KERN_ALERT
"Virtual partitions found for /dev/%s - ignoring any "
/* Need to skankily setup 'partition' information */
gd->part[minor].start_sect = 0;
- gd->part[minor].nr_sects = disk->capacity;
- gd->sizes[minor] = disk->capacity;
+ gd->part[minor].nr_sects = xd->capacity;
+ gd->sizes[minor] = xd->capacity;
gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
-
- vbd_state[device] |= VBD_KNOWN;
}
else
{
/* Some final fix-ups depending on the device type */
- switch ( XD_TYPE(disk->info) )
+ switch ( XD_TYPE(xd->info) )
{
case XD_TYPE_CDROM:
case XD_TYPE_FLOPPY:
case XD_TYPE_TAPE:
- gd->part[minor].nr_sects = disk->capacity;
- gd->sizes[minor] = disk->capacity>>(BLOCK_SIZE_BITS-9);
+ gd->part[minor].nr_sects = xd->capacity;
+ gd->sizes[minor] = xd->capacity>>(BLOCK_SIZE_BITS-9);
gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE;
printk(KERN_ALERT
"Skipping partition check on %s /dev/%s\n",
- XD_TYPE(disk->info)==XD_TYPE_CDROM ? "cdrom" :
- (XD_TYPE(disk->info)==XD_TYPE_TAPE ? "tape" :
+ XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" :
+ (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" :
"floppy"), disk_name(gd, MINOR(device), buf));
-
- vbd_state[device] |= VBD_KNOWN; /* remember the VBD is there now */
break;
case XD_TYPE_DISK:
break;
}
register_disk(gd, device, gd->max_p, &xlvbd_block_fops,
- disk->capacity);
-
- vbd_state[device] |= VBD_KNOWN; /* remember the VBD is there now */
-
+ xd->capacity);
break;
default:
printk(KERN_ALERT "XenoLinux: unknown device type %d\n",
- XD_TYPE(disk->info));
+ XD_TYPE(xd->info));
break;
}
}
- printk(KERN_ALERT "XenoLinux Virtual Block Device Driver "
- "installed [device: %04x]\n", device);
-
- return 0;
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
}
-/**
- * xlvbd_remove - see if a VBD should be removed and do so if appropriate
+/*
+ * xlvbd_remove_device - remove a device node if possible
* @device: numeric device ID
*
* Updates the gendisk structure and invalidates devices.
*
* This is OK for now but in future, should perhaps consider where this should
- * deallocate gendisks / unregister devices?
+ * deallocate gendisks / unregister devices.
*/
-int xlvbd_remove(int device)
+static int xlvbd_remove_device(int device)
{
- int major = MAJOR(device);
- int minor = MINOR(device);
- int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */
- int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */
- int i; /* loop counter */
- int partno;
- int max_part;
- char * major_name;
-
+ int i, rc = 0, max_part, minor = MINOR(device);
struct gendisk *gd;
+ struct block_device *bd;
+ xl_disk_t *disk;
- DPRINTK("xl_vbd.c::xlvbd_remove() - Removing a VBD\n");
-
- /* if device is in use then we shouldn't change its settings */
- if(xldev_to_xldisk(device)->usage)
- {
- DPRINTK("xl_vbd.c::xlvbd_remove() - VBD in use, could not remove\n");
- printk(KERN_ALERT "Removing XenoLinux VBD failed - "
- "in use [device: %x]\n", device);
+ if ( (bd = bdget(device)) == NULL )
return -1;
- }
- if((gd = get_gendisk(device)) == NULL)
+ /*
+ * Update of partition info, and check of usage count, is protected
+ * by the per-block-device semaphore.
+ */
+ down(&bd->bd_sem);
+
+ if ( ((gd = get_gendisk(device)) == NULL) ||
+ ((disk = xldev_to_xldisk(device)) == NULL) )
+ BUG();
+
+ if ( disk->usage != 0 )
{
- printk(KERN_ALERT
- "xl_vbd.c::xlvbd_remove() - ERROR could not get gendisk\n");
-
- return -1;
+ printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
+ rc = -1;
+ goto out;
}
- if ( is_ide )
- {
- major_name = XLIDE_MAJOR_NAME;
- max_part = XLIDE_MAX_PART;
- }
- else if ( is_scsi )
- {
- major_name = XLSCSI_MAJOR_NAME;
- max_part = XLSCSI_MAX_PART;
+ if ( IDE_DISK_MAJOR(MAJOR(device)) )
+ max_part = XLIDE_MAX_PART;
+ else if ( SCSI_BLK_MAJOR(MAJOR(device)) )
+ max_part = XLSCSI_MAX_PART;
+ else
+ max_part = XLVBD_MAX_PART;
+
+ if ( (minor & (max_part-1)) != 0 )
+ {
+ /* 1: The VBD is mapped to a partition rather than a whole unit. */
+ invalidate_device(device, 1);
+ gd->part[minor].start_sect = 0;
+ gd->part[minor].nr_sects = 0;
+ gd->sizes[minor] = 0;
+
+ /* Clear the consists-of-virtual-partitions flag if possible. */
+ gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+ for ( i = 0; i < max_part; i++ )
+ if ( gd->sizes[(minor & ~(max_part-1)) + i] != 0 )
+ gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
}
else
- {
- major_name = XLVBD_MAJOR_NAME;
- max_part = XLVBD_MAX_PART;
+ {
+ /* 2: The VBD is mapped to an entire 'unit'. Clear all partitions. */
+ for ( i = max_part - 1; i >= 0; i-- )
+ {
+ invalidate_device(device+i, 1);
+ gd->part[minor+i].start_sect = 0;
+ gd->part[minor+i].nr_sects = 0;
+ gd->sizes[minor+i] = 0;
+ }
}
- partno = minor & (max_part - 1);
+ out:
+ up(&bd->bd_sem);
+ bdput(bd);
+ return rc;
+}
+
+/*
+ * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
+ * state. The VBDs need to be updated in this way when the domain is
+ * initialised and also each time we receive an XLBLK_UPDATE event.
+ */
+void xlvbd_update_vbds(void)
+{
+ int i, j, k, old_nr, new_nr;
+ xen_disk_t *old_info, *new_info, *merged_info;
- DPRINTK("Got partno = 0x%x\n", partno);
+ old_info = vbd_info;
+ old_nr = nr_vbds;
- if(partno) /* if the VBD is mapped to a "partition" device node in Linux */
+ new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+ if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
{
- int should_clear_virtpart = 1; /* if this is set true we should clear
- * the GENHD_FL_VIRT_PARTNS flag in the
- * gendisk */
-
- gd->sizes[minor] = 0;
+ kfree(new_info);
+ return;
+ }
- for(i = 0; i < max_part; i++)
- if(gd->sizes[minor - partno + i]) should_clear_virtpart = 0;
-
- /* if there aren't any virtual partitions here then clear the flag for
- * this unit */
- if(should_clear_virtpart)
- {
- gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
+ /*
+ * Final list maximum size is old list + new list. This occurs only when
+ * old list and new list do not overlap at all, and we cannot yet destroy
+ * VBDs in the old list because the usage counts are busy.
+ */
+ merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
- DPRINTK("xl_vbd.c::xlvbd_remove() - "
- "cleared virtual partition flag\n");
- }
-
- gd->part[MINOR(device)].start_sect = 0;
- gd->part[MINOR(device)].nr_sects = 0;
- gd->sizes[MINOR(device)] = 0;
-
- invalidate_device(device, 1);
+ /* @i tracks old list; @j tracks new list; @k tracks merged list. */
+ i = j = k = 0;
- vbd_state[device] &= ~VBD_KNOWN; /* forget VBD was ever there */
- }
- else /* the VBD is mapped to a "whole disk drive" device node in Linux */
+ while ( (i < old_nr) && (j < new_nr) )
{
- for ( i = max_part - 1; i >= 0; i-- )
+ if ( old_info[i].device < new_info[j].device )
{
- invalidate_device(device+i, 1);
- gd->part[MINOR(device+i)].start_sect = 0;
- gd->part[MINOR(device+i)].nr_sects = 0;
- gd->sizes[MINOR(device+i)] = 0;
-
- vbd_state[device+i] &= ~VBD_KNOWN; /* forget VBD was ever there */
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ i++;
+ }
+ else if ( old_info[i].device > new_info[j].device )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ j++;
+ }
+ else
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ else
+ memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ i++; j++;
}
}
- printk(KERN_ALERT "XenoLinux Virtual Block Device removed "
- " [device: %04x]\n", device);
- return 0;
+ for ( ; i < old_nr; i++ )
+ {
+ if ( xlvbd_remove_device(old_info[i].device) != 0 )
+ memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
+ }
+
+ for ( ; j < new_nr; j++ )
+ {
+ if ( xlvbd_init_device(&new_info[j]) == 0 )
+ memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
+ }
+
+ vbd_info = merged_info;
+ nr_vbds = k;
+
+ kfree(old_info);
+ kfree(new_info);
}
+
/*
* Set up all the linux device goop for the virtual block devices (vbd's) that
* xen tells us about. Note that although from xen's pov VBDs are addressed
* linux -- this is just for convenience as it means e.g. that the same
* /etc/fstab can be used when booting with or without xen.
*/
-int __init xlvbd_init(xen_disk_info_t *xdi)
+int __init xlvbd_init(void)
{
- int i; /* loop counter */
+ int i;
+ /*
+ * If compiled as a module, we don't support unloading yet. We therefore
+ * permanently increment the reference count to disallow it.
+ */
SET_MODULE_OWNER(&xlvbd_block_fops);
+ MOD_INC_USE_COUNT;
/* Initialize the global arrays. */
-
- for( i = 0; i < 65536; i++)
- vbd_state[i] = VBD_NODEV;
-
for ( i = 0; i < 256; i++ )
{
/* from the generic ide code (drivers/ide/ide-probe.c, etc) */
xlvbd_max_sectors[i] = 128;
}
- /*
- * We need to loop through each major device we've been told about and:
- * a) register the appropriate blkdev
- * b) setup the indexed-by-major global arrays (blk_size[],
- * blksize_size[], hardsect_size[], max_sectors[], read_ahead[])
- * c) setup the block queue + make it sensible
- * d) create an appropriate gendisk structure, and
- * e) register the gendisk
- */
- for ( i = 0; i < xdi->count; i++ )
- {
- xlvbd_init_device(&xdi->disks[i]);
- }
-
- return 0;
-}
-
-/**
- * xlvbd_update_vbds - reprobes the VBD status and performs updates driver state
- *
- * The VBDs need to be updated in this way when the domain is initialised and
- * also each time we receive an XLBLK_UPDATE event.
- *
- * The vbd_state array is consistent on entry to and exit from this function but
- * not whilst the function runs, so this should not be called re-entrantly.
- */
-void xlvbd_update_vbds(void)
-{
- int i; /* loop counter */
- int ret; /* return values */
- block_io_op_t op; /* for talking to Xen */
-
- xen_disk_info_t *xdi = &xlblk_disk_info; /* pointer to structures in
- * xl_block.c */
-
- /* Probe for disk information. */
- memset(&op, 0, sizeof(op));
- op.cmd = BLOCK_IO_OP_VBD_PROBE;
- op.u.probe_params.domain = 0;
-
- xdi->count = 0; /* need to keep resetting this to zero because the probe
- * will append results after "used" space in the array */
-
- memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info));
+ vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
+ nr_vbds = xlvbd_get_vbd_info(vbd_info);
- ret = HYPERVISOR_block_io_op(&op);
-
- if ( ret )
+ if ( nr_vbds < 0 )
{
- printk(KERN_ALERT "Could not probe disks (%d)\n", ret);
+ kfree(vbd_info);
+ vbd_info = NULL;
+ nr_vbds = 0;
}
-
- /* copy back the [updated] count parameter */
- xlblk_disk_info.count = op.u.probe_params.xdi.count;
-
- DPRINTK("Retrieved %d disks\n",op.u.probe_params.xdi.count);
-
-
- for( i = 0; i < 65536; i++ )
- vbd_state[i] |= VBD_NODEV;
-
- for( i = 0; i < xdi->count; i++ )
- {
- int device = xdi->disks[i].device;
- xl_disk_t *d;
-
- vbd_state[device] &= ~VBD_NODEV;
-
- DPRINTK("Inspecting xen_disk_t: device = %hx, info = %hx, "
- "capacity = %lx, domain = %d\n",
- xdi->disks[i].device, xdi->disks[i].info, xdi->disks[i].capacity,
- xdi->disks[i].domain);
-
- if(xdi->disks[i].info & XD_FLAG_VIRT)
- {
- /* RACE: need to fix this for SMP / pre-emptive kernels */
-
- d = xldev_to_xldisk(device);
-
- /* only go on to monkey with this stuff if we successfully got the
- * xldisk and it says no-one else is using the disk OR if we didn't
- * successfully retrieve the xldisk (so it doesn't exist and nobody
- * can be using it), otherwise skip on to the next device */
- if(d != NULL && d->usage > 0)
- {
- printk(KERN_ALERT "XenoLinux VBD Driver: "
- "skipping update in a disk currently in use");
- DPRINTK("Usage = %d\n", d->usage);
- continue; /* skip to next device */
- }
-
- printk(KERN_ALERT "XenoLinux VBD Driver: updating a VBD "
- "[device: %x]\n", device);
- /* also takes care of any overrides (i.e. due to VBDs mapped to
- * partitions overriding VBDs mapped to disks) and of registering
- * disks */
- xlvbd_init_device(xdi->disks + i);
- }
-
- }
-
- for( i = 0; i < 65536; i++ )
+ else
{
- switch(vbd_state[i])
- {
- case VBD_NODEV | VBD_KNOWN: /* a VBD we knew about before has gone */
-
- DPRINTK("About to remove VBD 0x%x\n",i);
-
- ret = xlvbd_remove(i);
-
- if(ret) DPRINTK("Failed to remove VBD\n");
-
- break;
-
- case VBD_NODEV: /* there's nothing here and there wasn't anything
- * before */
- break;
-
- case VBD_KNOWN: /* the device is present and it's set up */
- break;
-
- case 0: /* there's a device present we haven't set up - either
- * one of the "non virtual" VBDs or we weren't able to
- * update it because it was mounted */
- break;
-
- default: /* if there's any other weird combination, something
- * unexpected is happening */
- printk(KERN_ALERT "xl_vbd.c::xlvbd_update_vbds: BUG - Unknown state "
- "when updating VBDs: 0x%x\n", vbd_state[i]);
- }
+ for ( i = 0; i < nr_vbds; i++ )
+ xlvbd_init_device(&vbd_info[i]);
}
+ return 0;
}
-void xlvbd_cleanup(void)
-{
- int is_ide, is_scsi, i;
- struct gendisk *gd;
- char *major_name;
- int major;
-
- for ( major = 0; major < MAX_BLKDEV; major++ )
- {
- if ( (gd = get_gendisk(MKDEV(major, 0))) == NULL )
- continue;
-
- /*
- * If this is a 'Xeno' blkdev then at least one unit will have the Xeno
- * flag set.
- */
- for ( i = 0; i < gd->nr_real; i++ )
- if ( gd->flags[i] & GENHD_FL_XENO )
- break;
- if ( i == gd->nr_real )
- continue;
-
- is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */
- is_scsi = SCSI_BLK_MAJOR(major); /* is this a scsi device? */
-
- blk_cleanup_queue(BLK_DEFAULT_QUEUE(major));
-
- if ( is_ide )
- major_name = XLIDE_MAJOR_NAME;
- else if ( is_scsi )
- major_name = XLSCSI_MAJOR_NAME;
- else
- major_name = XLVBD_MAJOR_NAME;
-
- if ( unregister_blkdev(major, major_name) != 0 )
- printk(KERN_ALERT "XenoLinux Virtual Block Device Driver:"
- "major device %04x uninstalled w/ errors\n", major);
-
- /* XXX shouldn't we remove the gendisk from the kernel linked list and
- * deallocate the memory here? */
- }
-}
#ifdef MODULE
module_init(xlvbd_init);
-module_exit(xlvbd_cleanup);
#endif
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_net_io_op),
- "b" (op) );
+ "b" (op) : "memory" );
return ret;
}
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_network_op),
- "b" (network_op) );
+ "b" (network_op) : "memory" );
return ret;
}
-static inline int HYPERVISOR_block_io_op(void * block_io_op)
+static inline int HYPERVISOR_block_io_op(void *block_io_op)
{
int ret;
__asm__ __volatile__ (
TRAP_INSTR
: "=a" (ret) : "0" (__HYPERVISOR_block_io_op),
- "b" (block_io_op) );
+ "b" (block_io_op) : "memory" );
return ret;
}